---
title: Clean Data - PureSpectrum Survey
author: Weifang Xu, Taylor Chewning, and Qing Wang
date: August 6, 2024
output: pdf_document
fontsize: 11 pt
header-includes:
  \usepackage[T1]{fontenc}
  \usepackage[utf8]{inputenc}
  \usepackage{newpxtext,newpxmath}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


```{r}
## Clean the working environment and set up the working directory 
rm(list = ls())
setwd("/Users/qingwang/Downloads/Data Replication")

# load the libraries
library(tidyverse)
library(haven)
library(dplyr)

# import the dataset
resurvey <- read_csv("PureSpectrum/data_PureSpectrum.csv")%>%
  filter(Progress == 100)%>% #remove partial responses
  glimpse()

df <- resurvey %>% 
  mutate(attack = ifelse(alliance_DV1 < 3, 100, 0), # 1 = favor
         attack_cont = (alliance_DV1 * -1) + 6, #reverse code; higher values = more support for attack
         attack_cont = (attack_cont * 25)-25, #rescale from 0-100
         alliance = ifelse(exp_4 > 2, 1, 0), # 1 = alliance
         hmrts = ifelse(exp_4 == 1 | exp_4 == 3, 1, 0))%>% # 1 = violate
  mutate(male = ifelse(sex == 1, 1, 0),
         edu4 = as.numeric(case_when(educ == 1  ~ '1',
                                     educ == 2  ~ '1',
                                     educ == 3 ~ '2',
                                     educ == 4 ~ '3',
                                     educ == 5 ~ '4')),
         edu4 = (edu4 - 1)/3 , #rescale education 
         white = ifelse(race == 1, 1, 0),
# Convert to continuous         
         age_cat = ifelse(age == 1, 23.5, #average of age categories, or min
               ifelse(age == 2, 34.5,
               ifelse(age == 3, 44.5,
               ifelse(age == 4, 54.5,
               ifelse(age == 5, 64.5,
               ifelse(age == 6, 70, NA)))))),
         inc = ifelse(income == 1, 30000, #average of income categories, min and max
               ifelse(income == 2, 50000,
               ifelse(income == 3, 85000,
               ifelse(income == 4, 150000,
               ifelse(income == 5, 200000, NA))))))%>%
  mutate(inc_10k = inc/10000)%>%   # convert the inc variable unit ($ to 10k$)
#create 3-cat party
  mutate(party = ifelse(pid_1 == "4", "3", pid_1),
         party = ifelse(party == 1, "Republican",
                 ifelse(party == 2, "Democrat",
                 ifelse(party == 3, "Independent", NA))))%>%
#Create 7-category party  
  mutate(pid7_dem = ifelse(pid_2d == 1, "Strong Democrat",
                    ifelse(pid_2d == 2, "Not very strong Democrat", NA)), 
         pid7_rep = ifelse(pid_2r == 1, "Strong Republican",
                    ifelse(pid_2r == 2, "Not very strong Republican", NA)), 
         pid7_ind = ifelse(pid_2i == 1, "The Republican Party",
                    ifelse(pid_2i == 2, "The Democratic Party", 
                    ifelse(pid_2i == 4, "Neither", NA))))%>%
  mutate(pid7_temp = ifelse(!is.na(pid7_dem), pid7_dem, 
                ifelse(!is.na(pid7_ind), pid7_ind, 
                ifelse(!is.na(pid7_rep), pid7_rep, NA))))%>%
  mutate(pid7 = ifelse(pid7_temp == "Strong Democrat", "1",
                ifelse(pid7_temp == "Not very strong Democrat", "2",
                ifelse(pid7_temp == "The Democratic Party", "3",
                ifelse(pid7_temp == "Neither" , "4", 
                ifelse(pid7_temp == "The Republican Party", "5",
                ifelse(pid7_temp == "Not very strong Republican", "6",       
                ifelse(pid7_temp == "Strong Republican", "7", pid7_temp))))))))%>%
#rescale dispositional variables: higher values = higher nationalism/patriotism  
  mutate(nationalism_rc = (nationalism *-1) + 6,
         patriotism_rc = (patriotism *-1) + 5,
#take average of 4 cooperative internationalism measures         
         coop_int = (coop_int_1 + coop_int_2 + coop_int_3 + coop_int_4)/4)

######### generate mediator vars, rescale from 0-100
df_mediate <- df %>% 
  mutate(threat = (alliance_DV4_1+ alliance_DV4_2 + alliance_DV4_3 + alliance_DV4_4)/4,
        threat = (threat * 25)-25, #rescale from 0-100
        success = (alliance_DV5_1 + alliance_DV5_2)/2,
        success = (success * 25)-25, #rescale from 0-100
        cost = (alliance_DV5_3 + alliance_DV5_4 + alliance_DV5_5 + alliance_DV5_6)/4,
        cost = (cost * 25)-25, #rescale from 0-100
        oblig = case_when(alliance_DV2  == 3 ~ 0,
                          alliance_DV2  == 2 ~ 50,
                          alliance_DV2  == 1 ~ 100), # 100 = US has moral obligation
      immoral = case_when(alliance_DV3  == 1 ~ 100,
                          alliance_DV3  == 2 ~ 0), # 100 = morally wrong for US to attack
      moral = (oblig - immoral + 100)/2) # 100 = moral to attack, 0 = immoral to attack

#Save data
# %>% write_rds("PureSpectrum/clean_data_PureSpectrum.rds")

glimpse(df_mediate)
```

